# Put all necessary libraries here
library(tidyverse)
library(reprex)
library(dplyr)
library(infer)
library(moderndive)
library("styler")
library(lubridate)
library(ggthemes)
library(svglite)
library(DT)
library(shiny)
library(tidycensus)
library(ggmap)
library(leaflet)
library(tidycensus)
library(gganimate)
library(pdxTrees)
library(rvest)
library(httr)
Note: You should upload the final version to your Math 241 GitHub repo that I created for you, not Gradescope. We will grade this lab directly from your repo.
ggmap and interactive maps with leaflet.gganimate.For this problem we will return to the SE Portland 2018 car crash dataset.
pdx_crash_2018 <- read_csv("/home/courses/math241s21/Data/pdx_crash_2018_page1.csv")
LONGTD_DD, LAT_DD). Paste that code here to recreate that graph.ggplot(data = pdx_crash_2018,
mapping = aes(
x = LONGTD_DD,
y = LAT_DD,
color = "#4C4CFD")) +
geom_point(alpha = 0.3)
box <- c(bottom = 45.45, left = -122.7, top = 45.54, right = -122.45)
reed <- get_stamenmap(box, maptype = "toner", zoom = 12)
reed %>%
ggmap() +
geom_point(aes(x = LONGTD_DD, y = LAT_DD), data = pdx_crash_2018,
color = "red", size = 3)
#leaflet(options = leafletOptions(minZoom = 11, maxZoom = 13.5)) %>%
leaflet() %>%
setView(lng = -122.58, lat = 45.495, zoom = 11) %>%
addTiles() %>%
addCircleMarkers(lng = ~LONGTD_DD, lat = ~LAT_DD,
data = pdx_crash_2018, radius = 1,
opacity = 0.2)
day_time with categories:pdx_time <- pdx_crash_2018 %>%
filter(CRASH_HR_NO != 99) %>%
mutate(CRASH_HR_NO = as.numeric(CRASH_HR_NO),
day_time = case_when(CRASH_HR_NO > 5 & CRASH_HR_NO <= 12 ~ "Morning",
CRASH_HR_NO > 12 & CRASH_HR_NO <= 16 ~ "Afternoon",
CRASH_HR_NO > 16 & CRASH_HR_NO <= 20 ~ "Evening",
CRASH_HR_NO > 20 & CRASH_HR_NO <= 23 ~ "Night",
CRASH_HR_NO <= 5 ~ "Night"),
day_time = as.factor(day_time),
CRASH_TIME = fct_relevel(day_time, "Night", "Evening", "Afternoon", "Morning")
)
factpal <- colorFactor("PuOr", pdx_time$day_time)
pdx_time %>%
leaflet() %>%
setView(lng = -122.58, lat = 45.495, zoom = 11) %>%
addTiles() %>%
addCircleMarkers(lng = ~LONGTD_DD, lat = ~LAT_DD,
data = pdx_time,
color = ~factpal(day_time),
stroke = FALSE, fillOpacity = 0.5,
radius = 3.5) %>%
addLegend("bottomright", pal = factpal,
values = ~day_time, title = "Crash",
opacity = 1)
Add this variable to your interactive map using color. Make sure to include a legend and be mindful of your color palette choice. How do crash locations vary by parts of the day?
pdx_crash_2018 <- pdx_crash_2018 %>%
mutate(CRASH_DT = str_remove(CRASH_DT, "00:00:00"))
date_crash <- paste("Date of Crash:", pdx_crash_2018$CRASH_DT)
pdx_time %>%
leaflet() %>%
setView(lng = -122.55, lat = 45.5, zoom = 12) %>%
addTiles() %>%
addCircleMarkers(lng = ~LONGTD_DD, lat = ~LAT_DD,
data = pdx_time, color = ~factor(day_time),
stroke = FALSE, fillOpacity = 0.5,
radius = 3.5, popup = date_crash)
sev1 <- pdx_crash_2018 %>%
mutate(severity = case_when(
CRASH_SVRTY_CD == 2 ~ "fatal")) %>%
filter(severity == "fatal")
sev2 <- pdx_crash_2018 %>%
mutate(severity = case_when(
CRASH_SVRTY_CD == 4 ~ "injury")) %>%
filter(severity == "injury")
sev3 <- pdx_crash_2018 %>%
mutate(severity = case_when(
CRASH_SVRTY_CD == 5 ~ "property damage")) %>%
filter(severity == "property damage")
leaflet() %>%
setView(lng = -122.55, lat = 45.5, zoom = 12) %>%
addTiles() %>%
addCircleMarkers(lng = ~LONGTD_DD, lat = ~LAT_DD,
data = sev1, color = "red",
stroke = FALSE, fillOpacity = 0.5,
radius = 3.5, popup = date_crash, group = "fatal") %>%
addCircleMarkers(lng = ~LONGTD_DD, lat = ~LAT_DD,
data = sev2, color = "blue",
stroke = FALSE, fillOpacity = 0.5,
radius = 3.5, popup = date_crash, group = "injury") %>%
addCircleMarkers(lng = ~LONGTD_DD, lat = ~LAT_DD,
data = sev3, color = "green",
stroke = FALSE, fillOpacity = 0.5,
radius = 3.5, popup = date_crash, group = "property damage") %>%
addLayersControl(overlayGroups = c("fatal", "injury","property damage"))
Looks like fatal crashes occur near the highway and freeway while property damage incidents are concentrated downtown and the major roads. g. Let’s go back to the static map and this time change our geom. Use geom_density2d() instead of geom_point(). Interpret what this map tells us about car crashes in the SE and compare the story to the map using geom_point().
box <- c(bottom = 45.45, left = -122.7, top = 45.54, right = -122.45)
reed <- get_stamenmap(box, maptype = "toner", zoom = 12)
reed %>%
ggmap() +
geom_density2d(aes(x = LONGTD_DD, y = LAT_DD), data = pdx_crash_2018,
color = "red")
Compared to the static map, the number of rings in a particular location indicates more frequent accidents in the location. We can see that near highways and near downtown there are more accidents compared to other areas.
day_time. Does the distribution on accidents seem to vary much by part of day?reed %>%
ggmap() +
geom_density2d(data = pdx_time, aes(x = LONGTD_DD, y = LAT_DD),
inherit.aes = FALSE,
color = "red") +
facet_grid(day_time ~ .) +
theme_minimal()
Areas with high accident distribution do not change. ### Problem 2: Choropleth Maps
For this problem, I want you to practice creating choropleth maps. Let’s grab some data using tidycensus. Remember that you will have to set up an API key.
api_key <- "6413ba5315708be54be79c9d03ead7b0a4b29d3a"
library(tidycensus)
B25064_001) from the American Community Survey for Multnomah county, Oregon. I want you to do data pulls at three geography resolutions: county subdivision, tract, and block group.rent_county <- get_acs(geography = "county subdivision",
variables = "B25064_001",
county = "multnomah",
state = "Oregon",
geometry = TRUE,
key = api_key,
cache_table = TRUE)
##
|
| | 0%
|
|== | 2%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 6%
|
|===== | 7%
|
|====== | 8%
|
|====== | 9%
|
|======== | 11%
|
|======== | 12%
|
|========== | 14%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============= | 18%
|
|============== | 20%
|
|================ | 23%
|
|=================== | 28%
|
|====================== | 32%
|
|======================== | 34%
|
|=========================== | 39%
|
|================================================== | 71%
|
|===================================================== | 76%
|
|=========================================================== | 85%
|
|======================================================================| 100%
tract_county <- get_acs(geography = "tract",
variables = "B25064_001",
county = "multnomah",
state = "Oregon",
geometry = TRUE,
key = api_key,
cache_table = TRUE)
##
|
| | 0%
|
|======================================================================| 100%
block_county <- get_acs(geography = "block group",
variables = "B25064_001",
county = "multnomah",
state = "Oregon",
geometry = TRUE,
key = api_key,
cache_table = TRUE)
##
|
| | 0%
|
|============================================================= | 86%
|
|======================================================================| 100%
ggplot(data = rent_county, mapping = aes(geometry = geometry, fill = estimate)) +
geom_sf() +
coord_sf() +
scale_fill_viridis_c(direction = -1) +
theme_void()
ggplot(data = tract_county, mapping = aes(geometry = geometry, fill = estimate)) +
geom_sf() +
coord_sf() +
scale_fill_viridis_c(direction = -1) +
theme_void()
ggplot(data = block_county, mapping = aes(geometry = geometry, fill = estimate)) +
geom_sf() +
coord_sf() +
scale_fill_viridis_c(direction = -1) +
theme_void()
Tract seems to be the vest resolution. block_county is missing a lot data on gross rent and the divisions are small. The county subdivision only has five gross rent divisions which does not accurately represent Multnomah county. tract_county is the best because it has mostly complete data and the subdivisions do are large enough for the viewer to identify the area.
pal4 <- colorFactor(palette = "viridis", domain = rent_county$estimate)
rent_county %>%
sf::st_transform(crs = "+init=epsg:4326") %>%
leaflet() %>%
addTiles() %>%
addPolygons(popup = ~estimate, fillColor = ~pal4(estimate),
stroke = FALSE, fillOpacity = 0.9) %>%
addLegend("bottomright", pal = pal4,
values = ~estimate, title = "Median Income",
opacity = 1)
Let’s take a static plot we made earlier in the semester and add some animation.
.
grad_rate <- "https://www.reed.edu/ir/gradrateshist.html"
rate_table <- grad_rate %>%
read_html() %>%
html_nodes(css = "table")
grad_rate_table <- html_table(rate_table[[1]], fill = TRUE)
colnames(grad_rate_table) <- c("Year", "Cohort_size", "gradfour", "gradfive", "gradsix")
grad_rate_table <- grad_rate_table %>%
slice(-1)
gradratetable <- grad_rate_table %>%
mutate(Grad_year = as.numeric(Year),
cohort = parse_number(Cohort_size),
four = parse_number(gradfour),
five = parse_number(gradfive),
six = parse_number(gradsix)
) %>%
dplyr::select(Grad_year, cohort, four, five, six)
grad_rate_table_final <- pivot_longer(gradratetable, cols = c(four, five, six),
names_to = "Years to Graduate",
values_to = "Graduation Rate") %>%
select(Grad_year, cohort, `Years to Graduate`, `Graduation Rate`)
static <- ggplot(grad_rate_table_final, aes(fill = `Years to Graduate`, y = `Graduation Rate`, x = Grad_year)) +
geom_bar(stat = "identity") +
facet_grid(. ~ `Years to Graduate`) +
theme_bw() +
labs(x = "Graduation Year")
animation <- static +
transition_manual(Grad_year, cumulative = TRUE)
#transition_reveal(along = Grad_year)
animate(animation, fps = 10, end_pause = 20)
It really shows the growth over time for each group and learn which groups stay relatively consistent over time(people who graduate in six years). Depending the preferences of the viewer, animation made the plot more difficult compare the graduation rates as there is no static point in the animation.
pdxTrees dataset, create two leaflet maps. I want you to get creative and really dig into the functionalities of leaflet. Considerparks <- get_pdxTrees_parks()
parks <- parks %>%
filter(Common_Name == c("Douglas-Fir", "Bigleaf Maple", "American Sycamore", "American Elm", "Austrian Black Pine"))
pal3 <- colorFactor(palette = "viridis", domain = parks$Common_Name)
parks %>%
leaflet() %>%
addTiles() %>%
addCircleMarkers(lng = ~ Longitude, lat = ~Latitude,
data = parks,
stroke = FALSE,
fillOpacity = 0.6,
color = ~pal3(Common_Name)) %>%
addLegend("bottomright", pal = pal3,
values = ~Common_Name, title = "Tree",
opacity = 1) %>%
setView(lng = -122.629383, lat = 45.481245, zoom = 11)
State the key takeaways that we can learn from your maps. The graph can show the viewer how diverse the species of trees are in parks. It also can tell us where the trees are located.
pdxTrees dataset, create an animated graph. Again, think carefully about the various animation features. In particular, considerpdxpal <- colorFactor("Tree Common Name:", parks$Common_Name)
name <- paste("Tree Common Name:", parks$Common_Name)
something <- parks %>%
mutate(Condition = fct_relevel(Condition, "Good", "Fair", "Poor")) %>%
ggplot(data = parks, mapping = aes(x = Longitude, y = Latitude, color = Condition)) +
geom_point() +
transition_manual(Inventory_Date, cumulative = TRUE)
animate(something, fps = 5, end_pause = 20)
State the key takeaways that we can learn from your animated graph. Also address whether or not you think the animation helps or hinders the delivery of these key takeaways. I was trying to show tree condition over time but wasn’t able to show that. I think it hinders the key takeaway due to the map not being portrayed.